For this Seurat analysis, we used the following parameters:
pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars
Starting with this Seurat object from Mireille.
path <- "/fast/work/groups/cubi/milekm/mireille/de/fracture-healing-and-aging-scseq/DE_analysis/miha"
sobj <- readRDS(file.path(path,params$path_to_object))
selected_cell_types <- sobj[[]] %>%
pull(cluster_id) %>%
unique()
sample <- sobj[[]] %>%
select(group,mouse) %>%
mutate(sample=paste0(group,"_",mouse)) %>%
select(sample)
sobj <- AddMetaData(sobj, metadata = sample)
# cluster_metadata_3 <- sobj[[]] %>%
# mutate(age = str_extract(group, "^[0-9]+w")) %>%
# mutate(strain = str_extract(group, "w(E|SPF|)")) %>%
# mutate(time = str_extract(group, "[0-9]+days")) %>%
# mutate(arm = paste(age, strain, time, sep = "_")) %>%
# group_by(arm) %>%
# mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse)))) %>%
# select(sample, cell_type, group, mouse, age, location, time, strain,pseudoID) %>%
# distinct()
expr <- list()
for (cell_type in selected_cell_types) {
for (sample in unique(sobj[[]]$sample)) {
# sample <- unique(sobj[[]]$sample)[1]
# cell_type <- selected_cell_types[1]
cells <- Cells(sobj)[(sobj@meta.data$sample==sample & sobj$cluster_id==cell_type ) ]
cells <- cells[!is.na(cells)]
if (length(cells) > 1) {
expr[[paste0(cell_type,';',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}
}
}
for (sample in unique(sobj[[]]$sample)) {
cells <- Cells(sobj)[(sobj@meta.data$sample==sample )]
cells <- cells[!is.na(cells)]
expr[[paste0('all;',sample)]] <- rowSums(sobj@assays$RNA@counts[,cells])
}
sample_name <- sub(".*-","",sub(";.*","", sub(';','-',names(expr))))
colData<-data.frame(sample=names(expr),
cell_type=factor(sub(';.*','',names(expr))),
sample_name=sample_name,
group=sub("_11.*","",sub(".*;","",names(expr))),
mouse=sub('.*s_11','11',names(expr)) ,
age=sub("w.*","w", sample_name),
location=sub("_.*", "", sub(".*[wEF]","", sample_name )),
time = sub(".*_", "", sub("days.*", "days", sample_name)),
strain = ifelse(grepl("12w",sample_name), "young",
ifelse(grepl("SPF", sample_name), "aged",
ifelse(grepl("wE", sample_name),"immuno", NA))))
colData <- colData %>%
mutate(age_time_strain = paste0(age, "_", time, "_", strain)) %>%
group_by(age_time_strain) %>%
mutate(pseudoID = sprintf("ID%02d", as.numeric(as.factor(mouse))))
counts <- do.call(cbind, expr)
# write.table(counts, "all_counts_over_cell_types_expanded_unique.tsv", quote = F, sep ="\t", row.names=T)
Get number of reads per sample.
lengths <- colSums(counts)
df_lengths <- data.frame(sample = names(lengths),
n_reads = lengths) %>%
mutate(group = sub("_11.*","",sub(".*;", "",sample)),
celltype = sub(";.*", "", sample))
ggplot(df_lengths, aes(celltype,n_reads,fill=group))+
geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2) +
facet_wrap(~celltype, scales="free")
First, we run DESeq2 by including the pseudoID in the model. Here we assume that the age_time_location of the mouse is replicated in triplicate (pseudoID).
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
selected_contrasts <- c("12wfx_5days_vs_12wfx_2days",
"26wEfx_5days_vs_26wEfx_2days",
"26wSPFfx_5days_vs_26wSPFfx_2days")
res <- list()
for (celltype in selected_cell_types){
coldata_df <- colData %>%
dplyr::filter(cell_type %in% celltype)
counts_df <- subset(counts, select = coldata_df$sample)
if (sum(colSums(counts_df)) != 0){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~pseudoID + group)
contrasts <- selected_contrasts
sub_res <- list()
for (comparison in contrasts){
perturbation <- sub("_vs_.*", "", comparison)
reference <- sub(".*_vs_", "", comparison)
dds$group <- relevel(dds$group, ref = reference)
if (celltype != "all" & celltype != "all_cells"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
if(params$shrinkage_type=="apeglm"){
sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
} else {
sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
}
}
}
res[[celltype]] <- do.call("rbind", sub_res) %>%
tibble::remove_rownames()
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(!is.na(padj), contrast == comparison) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison,padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
print(p)
}
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(contrast == comparison) %>%
ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison, padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
scale_x_continuous(trans='log10') +
coord_cartesian(ylim =c(-4,4)) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2)
print(p)
}
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
Repeat this without pseudoID in the model. So only ~group.
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all","Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
selected_contrasts <- c("12wfx_5days_vs_12wfx_2days",
"26wEfx_5days_vs_26wEfx_2days",
"26wSPFfx_5days_vs_26wSPFfx_2days")
res <- list()
for (celltype in selected_cell_types){
coldata_df <- colData %>%
dplyr::filter(cell_type %in% celltype)
counts_df <- subset(counts, select = coldata_df$sample)
if (sum(colSums(counts_df)) != 0){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~group)
contrasts <- selected_contrasts
sub_res <- list()
for (comparison in contrasts){
perturbation <- sub("_vs_.*", "", comparison)
reference <- sub(".*_vs_", "", comparison)
dds$group <- relevel(dds$group, ref = reference)
if (celltype != "all" & celltype != "all_cells"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
if(params$shrinkage_type=="apeglm"){
sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
} else {
sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
}
}
}
res[[celltype]] <- do.call("rbind", sub_res) %>%
tibble::remove_rownames()
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(!is.na(padj), contrast == comparison) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison,padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
print(p)
}
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(contrast == comparison) %>%
ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison, padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
scale_x_continuous(trans='log10') +
coord_cartesian(ylim =c(-4,4)) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2)
print(p)
}
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
Here, we run DESeq2 by including the mouse ID in the model (not by pseudoID but by actual mouse ID). For this, we have to select the subset of the mice by age_time_strain, so that the model can be full rank.
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
age_time_strain <- unique(colData$age_time_strain)
res <- list()
for (which_mice in age_time_strain){
for (celltype in selected_cell_types){
coldata_df <- colData %>%
dplyr::filter(cell_type %in% celltype, age_time_strain == which_mice)
counts_df <- subset(counts, select = coldata_df$sample)
which_groups <- unique(coldata_df$group)
contrasts <- expand_grid(perturbation=which_groups, reference=which_groups) %>%
filter(perturbation < reference) %>%
mutate(contrast = paste0(perturbation,"_vs_",reference)) %>%
pull(contrast)
sub_res <- list()
for (comparison in contrasts){
perturbation <- sub("_vs_.*", "", comparison)
reference <- sub(".*_vs_", "", comparison)
if (sum(colSums(counts_df)) != 0){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~mouse + group)
dds$group <- relevel(dds$group, ref = reference)
if (celltype != "all" & celltype != "all_cells"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
}
if(params$shrinkage_type=="apeglm"){
sub_res[[comparison]] <- lfcShrink(dds, coef=paste0("group_",comparison), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
} else {
sub_res[[comparison]] <- lfcShrink(dds, contrast=c("group",perturbation, reference), type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = comparison)
}
}
res[[celltype]] <- do.call("rbind", sub_res) %>%
tibble::remove_rownames()
}
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(!is.na(padj), contrast == comparison) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison,padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
print(p)
}
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(contrast == comparison) %>%
ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison, padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
scale_x_continuous(trans='log10') +
coord_cartesian(ylim =c(-4,4)) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2)
print(p)
}
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# # write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
Fourth approach is to run all contrasts separately and put only group into the model. We compare these results.
res <- list()
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
condition_perturbation <- "12wfx_5days"
condition_reference <- "12wfx_2days"
for (celltype in selected_cell_types){
# celltype<-"B Cells"
coldata_df <- colData %>%
dplyr::filter(cell_type == celltype & (group ==condition_perturbation | group == condition_reference) )
counts_df <- subset(counts, select = coldata_df$sample)
if (sum(colSums(counts_df) != 0) & length(unique(coldata_df$group)) == 2){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~group)
dds$group <- relevel(dds$group, ref = condition_reference)
if (celltype != "all"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = resultsNames(dds)[2])
}
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
res_df %>%
dplyr::filter(!is.na(padj)) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(res_df$contrast)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(contrast == comparison) %>%
ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison, padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
scale_x_continuous(trans='log10') +
coord_cartesian(ylim =c(-4,4)) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2)
print(p)
}
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
res <- list()
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all", "Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
condition_perturbation <- "26wEfx_5days"
condition_reference <- "26wEfx_2days"
for (celltype in selected_cell_types){
# celltype<-"B Cells"
coldata_df <- colData %>%
dplyr::filter(cell_type == celltype & (group ==condition_perturbation | group == condition_reference) )
counts_df <- subset(counts, select = coldata_df$sample)
if (sum(colSums(counts_df) != 0) & length(unique(coldata_df$group)) == 2){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~group)
dds$group <- relevel(dds$group, ref = condition_reference)
if (celltype != "all"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = resultsNames(dds)[2])
}
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
res_df %>%
dplyr::filter(!is.na(padj)) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(res_df$contrast)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(contrast == comparison) %>%
ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison, padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
scale_x_continuous(trans='log10') +
coord_cartesian(ylim =c(-4,4)) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2)
print(p)
}
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
res <- list()
# selected_cell_types <- c("Monocytes / Macrophages", "Neutrophils", "Myeloid Precursor", "T Cells", "B Cells", "Dendritic Cells", "all")
selected_cell_types <- c("all","Myeloid Precursor", "B Cells","Neutrophils", "Monocytes / Macrophages")
condition_perturbation <- "26wSPFfx_5days"
condition_reference <- "26wSPFfx_2days"
for (celltype in selected_cell_types){
# celltype<-"B Cells"
coldata_df <- colData %>%
dplyr::filter(cell_type == celltype & (group ==condition_perturbation | group == condition_reference) )
counts_df <- subset(counts, select = coldata_df$sample)
if (sum(colSums(counts_df) != 0) & length(unique(coldata_df$group)) == 2){
dds <- DESeqDataSetFromMatrix(countData=as.matrix(counts_df),
colData=coldata_df,
design=~group)
dds$group <- relevel(dds$group, ref = condition_reference)
if (celltype != "all"){
dds <- estimateSizeFactors(dds, type='poscounts')
dds <- DESeq(dds, test = 'LRT', reduced = ~1, useT=TRUE, minmu=1e-6, minReplicatesForReplace=Inf)
} else {
dds <- estimateSizeFactors(dds)
dds <- DESeq(dds)
}
res[[celltype]] <- lfcShrink(dds, coef=2, type=params$shrinkage_type, format="DataFrame") %>%
as.data.frame() %>%
tibble::rownames_to_column('gene_name') %>%
mutate(cell_type=celltype, contrast = resultsNames(dds)[2])
}
}
res_df <- do.call("rbind", res) %>%
tibble::remove_rownames()
res_df %>%
dplyr::filter(!is.na(padj)) %>%
ggplot(aes(x=log2FoldChange,y=-log10(pvalue),color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=40,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(res_df$contrast)+
facet_wrap(~cell_type, ncol = 2, scales = "free")
for (comparison in unique(res_df$contrast)){
p <- res_df %>%
dplyr::filter(contrast == comparison) %>%
ggplot(aes(x=baseMean,y=log2FoldChange,color=padj < .05)) +
geom_point(size=.5, shape=1) +
geom_label_repel(data=res_df %>%
dplyr::filter(contrast == comparison, padj < .05),
aes(label=gene_name),
segment.size=.25,segment.alpha=.5,size=3,color='black',max.overlaps=30,
label.padding=0.05,label.size=NA) +
scale_color_manual(values=c('black','red')) +
scale_x_continuous(trans='log10') +
coord_cartesian(ylim =c(-4,4)) +
theme_classic() +
theme(legend.position='none',
strip.background=element_blank())+
ggtitle(comparison)+
facet_wrap(~cell_type, ncol = 2)
print(p)
}
res_df %>%
dplyr::arrange(padj) %>%
dplyr::filter(!is.na(padj) & (padj < .05)) %>%
dplyr::mutate_if(is.numeric, signif, 2) %>%
DT::datatable(rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# write.table(res_df,paste0("de_",condition_perturbation,"_vs_",condition_reference,".tsv"), quote=F, sep="\t",row.names = F)
Next, we show enrichment of transcriptional modules.
# library(tmod)
#
# genes <- lapply(res, function(x) x %>%
# filter(!is.na(padj)) %>%
# arrange(pvalue))
# lgenes <- lapply(genes, function(x) x$gene_name)
#
# df2tmod <- function(df, gene_id_col=ncol(df), module_id_col=1, module_title_col=2) {
#
# require(tmod, quietly=TRUE)
#
# gene_ids <- df[[gene_id_col]]
# module_ids <- df[[module_id_col]]
# m2g <- tapply(gene_ids, module_ids, list)
# df[[gene_id_col]] <- NULL
# df <- df[!duplicated(df[[module_id_col]]), ]
# colnames(df)[module_id_col] <- "ID"
# colnames(df)[module_title_col] <- "Title"
#
# makeTmod(modules=df, modules2genes=m2g)
# }
#
# df <- as.data.frame(msigdbr::msigdbr(species='Mus musculus'))
# df <- df[ , c("gs_name", "gs_id", "gs_cat", "gs_subcat", "gene_symbol") ]
# colnames(df) <- c("Title", "ID", "Category", "Subcategory", "GeneID")
# msig <- df2tmod(df[!is.na(df$GeneID),], gene_id_col=ncol(df), module_id_col=2, module_title_col=1)
# sel <- (msig$gs$Category %in% c("H")) | (msig$gs$Subcategory %in% c('CP:REACTOME','GO:BP','CP:KEGG'))
#
# res.tmod <- lapply(lgenes, tmodCERNOtest, mset = msig[sel])
#
# res.tmod.filtered <- lapply(res.tmod, function(x) subset(x, adj.P.Val < 1e-10 & AUC > .7))
#
# pie <- lapply(genes, function(x) tmodDecideTests(x$gene_name, lfc = x$log2FoldChange,
# pval = x$pvalue, lfc.thr=0.1, mset = msig[sel]))
# pie <- lapply(pie, as.data.frame)
#
# tmodPanelPlot(res.tmod.filtered, text.cex = .8, pie = pie, pie.style="rug", grid="at")
#
#
# display_terms <- function(df){
# if (nrow(res.tmod.filtered[[df]]) >0){
# res.tmod.filtered[[df]]$cell_type <- df
# res.tmod.filtered[[df]] %>%
# dplyr::arrange(adj.P.Val) %>%
# dplyr::mutate_if(is.numeric, signif, 2) %>%
# DT::datatable(caption=df,rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# }
# }
#
#
# lapply(names(res.tmod.filtered), display_terms )
#
#
# for (cell_type in names(res.tmod.filtered)){
# # cell_type <- "all_cells"
# res.tmod.filtered[[cell_type]] %>%
# dplyr::select(ID,Title,AUC,adj.P.Val) %>%
# dplyr::mutate_if(is.numeric, signif,2) %>%
# DT::datatable(caption=cell_type, rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# }
sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
##
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart-deseq/lib/libopenblasp-r0.3.25.so; LAPACK version 3.11.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=de_DE.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=de_DE.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=de_DE.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=de_DE.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Berlin
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] stringr_1.5.1 ggrepel_0.9.4
## [3] DESeq2_1.40.2 SummarizedExperiment_1.30.2
## [5] Biobase_2.60.0 MatrixGenerics_1.12.2
## [7] matrixStats_1.2.0 GenomicRanges_1.52.0
## [9] GenomeInfoDb_1.36.1 IRanges_2.34.1
## [11] S4Vectors_0.38.1 BiocGenerics_0.46.0
## [13] cowplot_1.1.1 ggplot2_3.4.4
## [15] gtools_3.9.5 tidyr_1.3.0
## [17] dplyr_1.1.4 SeuratObject_5.0.1
## [19] Seurat_4.4.0
##
## loaded via a namespace (and not attached):
## [1] RColorBrewer_1.1-3 jsonlite_1.8.8 magrittr_2.0.3
## [4] spatstat.utils_3.0-4 farver_2.1.1 rmarkdown_2.25
## [7] zlibbioc_1.46.0 vctrs_0.6.5 ROCR_1.0-11
## [10] spatstat.explore_3.2-5 RCurl_1.98-1.13 SQUAREM_2021.1
## [13] mixsqp_0.3-48 S4Arrays_1.0.4 htmltools_0.5.7
## [16] truncnorm_1.0-9 sass_0.4.8 sctransform_0.4.1
## [19] parallelly_1.36.0 KernSmooth_2.23-22 bslib_0.6.1
## [22] htmlwidgets_1.6.4 ica_1.0-3 plyr_1.8.9
## [25] plotly_4.10.3 zoo_1.8-12 cachem_1.0.8
## [28] igraph_1.5.1 mime_0.12 lifecycle_1.0.4
## [31] pkgconfig_2.0.3 Matrix_1.6-4 R6_2.5.1
## [34] fastmap_1.1.1 GenomeInfoDbData_1.2.11 fitdistrplus_1.1-11
## [37] future_1.33.0 shiny_1.8.0 digest_0.6.33
## [40] colorspace_2.1-0 patchwork_1.1.3 tensor_1.5
## [43] irlba_2.3.5.1 crosstalk_1.2.1 invgamma_1.1
## [46] labeling_0.4.3 progressr_0.14.0 fansi_1.0.6
## [49] spatstat.sparse_3.0-3 httr_1.4.7 polyclip_1.10-6
## [52] abind_1.4-5 compiler_4.3.2 withr_2.5.2
## [55] BiocParallel_1.34.2 highr_0.10 MASS_7.3-60
## [58] DelayedArray_0.26.6 tools_4.3.2 lmtest_0.9-40
## [61] httpuv_1.6.13 future.apply_1.11.0 goftest_1.2-3
## [64] glue_1.6.2 nlme_3.1-164 promises_1.2.1
## [67] grid_4.3.2 Rtsne_0.17 cluster_2.1.6
## [70] reshape2_1.4.4 generics_0.1.3 gtable_0.3.4
## [73] spatstat.data_3.0-3 data.table_1.14.10 XVector_0.40.0
## [76] sp_2.1-2 utf8_1.2.4 spatstat.geom_3.2-7
## [79] RcppAnnoy_0.0.21 RANN_2.6.1 pillar_1.9.0
## [82] spam_2.10-0 later_1.3.2 splines_4.3.2
## [85] lattice_0.22-5 survival_3.5-7 deldir_2.0-2
## [88] tidyselect_1.2.0 locfit_1.5-9.8 miniUI_0.1.1.1
## [91] pbapply_1.7-2 knitr_1.45 gridExtra_2.3
## [94] scattermore_1.2 xfun_0.41 DT_0.31
## [97] stringi_1.8.3 lazyeval_0.2.2 yaml_2.3.8
## [100] evaluate_0.23 codetools_0.2-19 tibble_3.2.1
## [103] cli_3.6.2 uwot_0.1.16 xtable_1.8-4
## [106] reticulate_1.34.0 munsell_0.5.0 jquerylib_0.1.4
## [109] Rcpp_1.0.11 globals_0.16.2 spatstat.random_3.2-2
## [112] png_0.1-8 parallel_4.3.2 ellipsis_0.3.2
## [115] dotCall64_1.1-1 bitops_1.0-7 listenv_0.9.0
## [118] viridisLite_0.4.2 scales_1.3.0 ggridges_0.5.4
## [121] crayon_1.5.2 leiden_0.4.3.1 purrr_1.0.2
## [124] rlang_1.1.2 ashr_2.2-63